In [1]:
import numpy as np
import pandas as pd
In [2]:
df = pd.read_csv("heart_disease_uci.csv")
In [3]:
df.head()
Out[3]:
id age sex dataset cp trestbps chol fbs restecg thalch exang oldpeak slope ca thal num
0 1 63 Male Cleveland typical angina 145.0 233.0 True lv hypertrophy 150.0 False 2.3 downsloping 0.0 fixed defect 0
1 2 67 Male Cleveland asymptomatic 160.0 286.0 False lv hypertrophy 108.0 True 1.5 flat 3.0 normal 2
2 3 67 Male Cleveland asymptomatic 120.0 229.0 False lv hypertrophy 129.0 True 2.6 flat 2.0 reversable defect 1
3 4 37 Male Cleveland non-anginal 130.0 250.0 False normal 187.0 False 3.5 downsloping 0.0 normal 0
4 5 41 Female Cleveland atypical angina 130.0 204.0 False lv hypertrophy 172.0 False 1.4 upsloping 0.0 normal 0
In [4]:
df.tail()
Out[4]:
id age sex dataset cp trestbps chol fbs restecg thalch exang oldpeak slope ca thal num
915 916 54 Female VA Long Beach asymptomatic 127.0 333.0 True st-t abnormality 154.0 False 0.0 NaN NaN NaN 1
916 917 62 Male VA Long Beach typical angina NaN 139.0 False st-t abnormality NaN NaN NaN NaN NaN NaN 0
917 918 55 Male VA Long Beach asymptomatic 122.0 223.0 True st-t abnormality 100.0 False 0.0 NaN NaN fixed defect 2
918 919 58 Male VA Long Beach asymptomatic NaN 385.0 True lv hypertrophy NaN NaN NaN NaN NaN NaN 0
919 920 62 Male VA Long Beach atypical angina 120.0 254.0 False lv hypertrophy 93.0 True 0.0 NaN NaN NaN 1
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        920 non-null    int64  
 1   age       920 non-null    int64  
 2   sex       920 non-null    object 
 3   dataset   920 non-null    object 
 4   cp        920 non-null    object 
 5   trestbps  861 non-null    float64
 6   chol      890 non-null    float64
 7   fbs       830 non-null    object 
 8   restecg   918 non-null    object 
 9   thalch    865 non-null    float64
 10  exang     865 non-null    object 
 11  oldpeak   858 non-null    float64
 12  slope     611 non-null    object 
 13  ca        309 non-null    float64
 14  thal      434 non-null    object 
 15  num       920 non-null    int64  
dtypes: float64(5), int64(3), object(8)
memory usage: 115.1+ KB
In [6]:
print(f"Record: {df.shape[0]}")
print(f"Columns : {df.shape[1]}")
Record: 920
Columns : 16
In [7]:
df.shape
Out[7]:
(920, 16)
In [8]:
df.isna().sum()
Out[8]:
id            0
age           0
sex           0
dataset       0
cp            0
trestbps     59
chol         30
fbs          90
restecg       2
thalch       55
exang        55
oldpeak      62
slope       309
ca          611
thal        486
num           0
dtype: int64
In [9]:
duplicates = df[df.duplicated()].value_counts()
print(duplicates)
Series([], Name: count, dtype: int64)
In [10]:
duplicates_all = df[df.duplicated(keep=False)]
print(duplicates_all)
Empty DataFrame
Columns: [id, age, sex, dataset, cp, trestbps, chol, fbs, restecg, thalch, exang, oldpeak, slope, ca, thal, num]
Index: []
In [11]:
remove_duplicates = df.drop_duplicates()
print(remove_duplicates)
      id  age     sex        dataset               cp  trestbps   chol    fbs  \
0      1   63    Male      Cleveland   typical angina     145.0  233.0   True   
1      2   67    Male      Cleveland     asymptomatic     160.0  286.0  False   
2      3   67    Male      Cleveland     asymptomatic     120.0  229.0  False   
3      4   37    Male      Cleveland      non-anginal     130.0  250.0  False   
4      5   41  Female      Cleveland  atypical angina     130.0  204.0  False   
..   ...  ...     ...            ...              ...       ...    ...    ...   
915  916   54  Female  VA Long Beach     asymptomatic     127.0  333.0   True   
916  917   62    Male  VA Long Beach   typical angina       NaN  139.0  False   
917  918   55    Male  VA Long Beach     asymptomatic     122.0  223.0   True   
918  919   58    Male  VA Long Beach     asymptomatic       NaN  385.0   True   
919  920   62    Male  VA Long Beach  atypical angina     120.0  254.0  False   

              restecg  thalch  exang  oldpeak        slope   ca  \
0      lv hypertrophy   150.0  False      2.3  downsloping  0.0   
1      lv hypertrophy   108.0   True      1.5         flat  3.0   
2      lv hypertrophy   129.0   True      2.6         flat  2.0   
3              normal   187.0  False      3.5  downsloping  0.0   
4      lv hypertrophy   172.0  False      1.4    upsloping  0.0   
..                ...     ...    ...      ...          ...  ...   
915  st-t abnormality   154.0  False      0.0          NaN  NaN   
916  st-t abnormality     NaN    NaN      NaN          NaN  NaN   
917  st-t abnormality   100.0  False      0.0          NaN  NaN   
918    lv hypertrophy     NaN    NaN      NaN          NaN  NaN   
919    lv hypertrophy    93.0   True      0.0          NaN  NaN   

                  thal  num  
0         fixed defect    0  
1               normal    2  
2    reversable defect    1  
3               normal    0  
4               normal    0  
..                 ...  ...  
915                NaN    1  
916                NaN    0  
917       fixed defect    2  
918                NaN    0  
919                NaN    1  

[920 rows x 16 columns]
In [12]:
df.shape
Out[12]:
(920, 16)
In [13]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.heatmap(df.isnull(),cbar = False)
plt.title("missing values")
plt.show()
In [14]:
def fill_na(df):
    for column in df.columns:
        if df[column].isna().sum() > 0:
            if df[column].dtype in ["int64","float64"]:
                df[column].fillna(round(df[column].median(),1),inplace = True)
            else:
                df[column].fillna(df[column].mode()[0],inplace = True)
                
In [15]:
fill_na(df)
In [16]:
df.isna().sum()
Out[16]:
id          0
age         0
sex         0
dataset     0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalch      0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
num         0
dtype: int64
In [17]:
top_legend = df["cp"].value_counts()
print(top_legend)
cp
asymptomatic       496
non-anginal        204
atypical angina    174
typical angina      46
Name: count, dtype: int64
In [18]:
top_legend = df["cp"].value_counts().nlargest(4).index
print(top_legend)
Index(['asymptomatic', 'non-anginal', 'atypical angina', 'typical angina'], dtype='object', name='cp')
In [19]:
top_legend = df["cp"].value_counts().nlargest(4).index
display(top_legend)
plt.figure(figsize = (15,6))
sns.scatterplot(x = "age",y = "chol",data = df[df["cp"].isin(top_legend)],hue = "cp")
plt.title("age vs chlo scatter plot")
plt.xlabel("age")
plt.ylabel("chol")
plt.legend(title = "chest pain type",bbox_to_anchor = (1.05,1),loc = "upper left")
plt.show()
Index(['asymptomatic', 'non-anginal', 'atypical angina', 'typical angina'], dtype='object', name='cp')
In [20]:
# SCATTER PLOT IN AGE 
top_legend = df["sex"].value_counts().nlargest(4).index
display(top_legend)
plt.figure(figsize = (15,6))
sns.scatterplot(x = "age",y = "chol",data = df[df["sex"].isin(top_legend)],hue = "sex")
plt.title("age vs chlo scatter plot")
plt.xlabel("age")
plt.ylabel("chol")
plt.legend(title = "gender type",bbox_to_anchor = (1.05,1),loc = "upper left")
plt.show()
Index(['Male', 'Female'], dtype='object', name='sex')
In [21]:
# Numerical Columns
numerical_cols = ["age","chol","trestbps","thalch","oldpeak","ca","num"]
for col in numerical_cols:
    # figure size
    plt.figure(figsize = (15,6))
    # subplot in hist 
    plt.subplot(1,2,1)
    sns.histplot(data = df[col],kde = True,color = "skyblue")
    plt.title(f"Distibution of {col}")
    plt.show()
    # subplot in boxplot
    plt.subplot(1,2,2)
    sns.boxplot(x = col, data = df, color = "purple")
    plt.title(f"Box plot of {col}")
    plt.tight_layout()
    plt.show()
    
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
In [22]:
# categorical columns 
categorical_col = ["sex","dataset","cp","fbs","restecg","exang","slope","thal"]
for col in categorical_col:
    # set the figure shape and size
    plt.figure(figsize = (15,4))
    # subplot 
    sns.countplot(x = col, data = df, order = df[col].value_counts().index, palette='Set2')
    plt.xticks(rotation = 90)
    plt.title(f"Count of {col}")
    plt.tight_layout()
    plt.show()
In [23]:
import plotly.express as px
fig = px.scatter(df,x= "age",y= "chol",color = "sex")
fig.update_layout(width = 1000,height = 800)
fig.update_layout(title = "scatter plot ofAge vs chol (colored by gender)")
fig.show()
In [24]:
from plotly.offline import iplot
plt = px.box(x = df["age"],
            labels = {"x" : "age"},
            title = "5 - number summary of (box plot) age")
plt.show()
In [25]:
fig = px.box(x = df["oldpeak"],
            labels = {"x" :"oldpeak"},
            title = "5 - Number summary of (box plot) oldpeak")
fig.show()
In [26]:
import plotly.express as px
fig = px.scatter(df,x = "chol",y = "age", color = "cp",size_max = 30, hover_name = "exang")
fig.update_layout(width = 1000,height = 500)
fig.update_layout(title_text = "scatter plot age vs chol (colored by old peak)")
fig.show()
In [27]:
import plotly.express as px

fig = px.scatter(df, x="chol", y="age", color="cp",size_max=30, hover_name="exang")
fig.update_layout(width=1000, height=500)
fig.update_layout(title_text="scatter plot age vs chol (colored by old peak)")
fig.show()
In [28]:
import plotly.express as px

fig = px.scatter(df, x='chol', y='age', color='cp',size_max = 30, hover_name = 'exang')
fig.update_layout(width=1000, height=500)
fig.update_layout(title_text='Scatter Plot of Cholesterol vs. Age (colored by cp)')
fig.show()
In [29]:
df_clean = df[df['oldpeak'].notna() & (df['oldpeak'] >= 0)]
print(df_clean)

import plotly.express as px
fig = px.scatter(
    df_clean,
    x='chol',
    y='age',
    color='cp',
    size='oldpeak',
    size_max=30,
    hover_name='exang'
)
fig.update_layout(width=1000, height=800)
fig.update_layout(title_text='Scatter Plot of Cholesterol vs. Age (colored by cp)')
fig.show()
      id  age     sex        dataset               cp  trestbps   chol    fbs  \
0      1   63    Male      Cleveland   typical angina     145.0  233.0   True   
1      2   67    Male      Cleveland     asymptomatic     160.0  286.0  False   
2      3   67    Male      Cleveland     asymptomatic     120.0  229.0  False   
3      4   37    Male      Cleveland      non-anginal     130.0  250.0  False   
4      5   41  Female      Cleveland  atypical angina     130.0  204.0  False   
..   ...  ...     ...            ...              ...       ...    ...    ...   
915  916   54  Female  VA Long Beach     asymptomatic     127.0  333.0   True   
916  917   62    Male  VA Long Beach   typical angina     130.0  139.0  False   
917  918   55    Male  VA Long Beach     asymptomatic     122.0  223.0   True   
918  919   58    Male  VA Long Beach     asymptomatic     130.0  385.0   True   
919  920   62    Male  VA Long Beach  atypical angina     120.0  254.0  False   

              restecg  thalch  exang  oldpeak        slope   ca  \
0      lv hypertrophy   150.0  False      2.3  downsloping  0.0   
1      lv hypertrophy   108.0   True      1.5         flat  3.0   
2      lv hypertrophy   129.0   True      2.6         flat  2.0   
3              normal   187.0  False      3.5  downsloping  0.0   
4      lv hypertrophy   172.0  False      1.4    upsloping  0.0   
..                ...     ...    ...      ...          ...  ...   
915  st-t abnormality   154.0  False      0.0         flat  0.0   
916  st-t abnormality   140.0  False      0.5         flat  0.0   
917  st-t abnormality   100.0  False      0.0         flat  0.0   
918    lv hypertrophy   140.0  False      0.5         flat  0.0   
919    lv hypertrophy    93.0   True      0.0         flat  0.0   

                  thal  num  
0         fixed defect    0  
1               normal    2  
2    reversable defect    1  
3               normal    0  
4               normal    0  
..                 ...  ...  
915             normal    1  
916             normal    0  
917       fixed defect    2  
918             normal    0  
919             normal    1  

[908 rows x 16 columns]
In [30]:
fig = px.scatter(df_clean,
                x = "age",
                y = "chol",
                color = "cp",
                size = "ca"
                )
fig.update_layout(title = "scatter plot of age vs chol (colored by ca)",
                 width = 1000,
                 height = 800)
fig.show()
In [31]:
fig = px.scatter(df_clean,
                x = "age",
                y = "chol",
                color = "cp",
                size = "ca",
                hover_name = "oldpeak")
fig.update_layout(title = "scatter plot of age vs chol (colored by ca)",
                 width = 1000,
                 height = 800)
fig.show()
In [32]:
def remove_outliers_iqr(df,column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    # inter qutraile range
    IQR = Q3 - Q1
    # bounds
    lower_bound = Q1 - (1.5) * IQR
    upper_bound = Q3 + (1.5) * IQR
    filteded_data = [(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return filteded_data
In [33]:
column = ["chol","trestbps","thalch","oldpeak","ca"]
data_clean = remove_outliers_iqr(df,column)
print(data_clean)
[     chol  trestbps  thalch  oldpeak     ca
0    True      True    True     True   True
1    True      True    True     True  False
2    True      True    True     True  False
3    True      True    True     True   True
4    True      True    True     True   True
..    ...       ...     ...      ...    ...
915  True      True    True     True   True
916  True      True    True     True   True
917  True      True    True     True   True
918  True      True    True     True   True
919  True      True    True     True   True

[920 rows x 5 columns]]
In [53]:
# bar plot using plotly
import plotly.express as px
fig = px.bar(df,x = "age",y = "chol",hover_name = "num",color = "sex",height = 500)
fig.update_layout(title ="Bar plot of age wise chol count")
fig.show()
In [36]:
plt.figure(figsize=(8,5))
for col in df.columns:
    sns.kdeplot(df[col], label=col)

plt.title("Feature Distributions After Cleaning", fontsize=14)
plt.xlabel("Value")
plt.legend()
plt.show()
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[36], line 3
      1 plt.figure(figsize=(8,5))
      2 for col in df.columns:
----> 3     sns.kdeplot(df[col], label=col)
      5 plt.title("Feature Distributions After Cleaning", fontsize=14)
      6 plt.xlabel("Value")

File D:\Users\tamilarasi\Lib\site-packages\seaborn\distributions.py:1695, in kdeplot(data, x, y, hue, weights, palette, hue_order, hue_norm, color, fill, multiple, common_norm, common_grid, cumulative, bw_method, bw_adjust, warn_singular, log_scale, levels, thresh, gridsize, cut, clip, legend, cbar, cbar_ax, cbar_kws, ax, **kwargs)
   1692 if ax is None:
   1693     ax = plt.gca()
-> 1695 p._attach(ax, allowed_types=["numeric", "datetime"], log_scale=log_scale)
   1697 method = ax.fill_between if fill else ax.plot
   1698 color = _default_color(method, hue, color, kwargs)

File D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1211, in VectorPlotter._attach(self, obj, allowed_types, log_scale)
   1206     if var_type not in allowed_types:
   1207         err = (
   1208             f"The {var} variable is {var_type}, but one of "
   1209             f"{allowed_types} is required"
   1210         )
-> 1211         raise TypeError(err)
   1213 # -- Get axis objects for each row in plot_data for type conversions and scaling
   1215 facet_dim = {"x": "col", "y": "row"}

TypeError: The x variable is categorical, but one of ['numeric', 'datetime'] is required
In [37]:
import matplotlib.pyplot as plt
import seaborn as sns

# Select only numeric columns
numeric_cols = df.select_dtypes(include='number').columns

plt.figure(figsize=(8, 5))
for col in numeric_cols:
    sns.kdeplot(df[col], label=col)

plt.title("Feature Distributions After Cleaning", fontsize=14)
plt.xlabel("Value")
plt.legend()
plt.show()
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

Categorical Scatter Plots¶

In [38]:
sns.scatterplot(data = df,x = "cp",y = "chol",hue = "sex")
Out[38]:
<Axes: xlabel='cp', ylabel='chol'>
In [39]:
sns.stripplot(data = df,x = "cp",y = "age",hue = "sex",jitter = False)
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

Out[39]:
<Axes: xlabel='cp', ylabel='age'>
In [40]:
sns.catplot(data = df,x = "cp",y = "age",kind = "strip",hue = "sex")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

Out[40]:
<seaborn.axisgrid.FacetGrid at 0x17a6512e0d0>
In [41]:
sns.swarmplot(data = df,x = "cp",y = "age")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

10.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

Out[41]:
<Axes: xlabel='cp', ylabel='age'>
D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

24.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

In [42]:
sns.catplot(data = df,x = "cp",y = "chol",kind = "swarm")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

47.4% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

19.6% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

8.6% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

Out[42]:
<seaborn.axisgrid.FacetGrid at 0x17a63980c10>
D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

55.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

29.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

16.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

56.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

In [43]:
sns.swarmplot(data = df,x = "cp",y = "age",hue= "sex")
D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

10.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

Out[43]:
<Axes: xlabel='cp', ylabel='age'>
D:\Users\tamilarasi\Lib\site-packages\seaborn\categorical.py:3544: UserWarning:

24.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.

In [44]:
sns.boxplot(data = df,x = "cp",y = "age")
Out[44]:
<Axes: xlabel='cp', ylabel='age'>
In [45]:
sns.boxplot(data = df,x = "cp",y = "chol")
Out[45]:
<Axes: xlabel='cp', ylabel='chol'>
In [46]:
sns.boxplot(data = df,x= "cp",y = "age",hue = "sex")
Out[46]:
<Axes: xlabel='cp', ylabel='age'>
In [47]:
sns.boxplot(data = df,x = "cp",y = "chol",hue = "sex")
Out[47]:
<Axes: xlabel='cp', ylabel='chol'>
In [48]:
sns.boxplot(data = df,x = "restecg",y = "thalch",hue = "fbs",color = "yellow")
Out[48]:
<Axes: xlabel='restecg', ylabel='thalch'>
In [49]:
df.head()
Out[49]:
id age sex dataset cp trestbps chol fbs restecg thalch exang oldpeak slope ca thal num
0 1 63 Male Cleveland typical angina 145.0 233.0 True lv hypertrophy 150.0 False 2.3 downsloping 0.0 fixed defect 0
1 2 67 Male Cleveland asymptomatic 160.0 286.0 False lv hypertrophy 108.0 True 1.5 flat 3.0 normal 2
2 3 67 Male Cleveland asymptomatic 120.0 229.0 False lv hypertrophy 129.0 True 2.6 flat 2.0 reversable defect 1
3 4 37 Male Cleveland non-anginal 130.0 250.0 False normal 187.0 False 3.5 downsloping 0.0 normal 0
4 5 41 Female Cleveland atypical angina 130.0 204.0 False lv hypertrophy 172.0 False 1.4 upsloping 0.0 normal 0
In [50]:
sns.boxplot(data = df,y = "oldpeak")
Out[50]:
<Axes: ylabel='oldpeak'>
In [51]:
 # Violinplot = (Boxplot + KDEplot)
sns.violinplot(data = df,x = "num",y = "age")
Out[51]:
<Axes: xlabel='num', ylabel='age'>
In [52]:
sns.violinplot(data = df,x = "cp",y = "age")
Out[52]:
<Axes: xlabel='cp', ylabel='age'>
In [54]:
sns.catplot(data = df, x = "cp",y = "age",kind = "violin",hue = "sex",split = True,color = "skyblue")
Out[54]:
<seaborn.axisgrid.FacetGrid at 0x17a65830c10>
In [55]:
sns.catplot(data = df,x = "cp",y = "age",kind = "violin",hue = "sex",color = "purple")
Out[55]:
<seaborn.axisgrid.FacetGrid at 0x17a65780090>

Categorical Estimate Plot¶

1.barplot¶

When there are multiple observations in each category, it also uses bootstrapping to¶

compute a confidence interval around the estimate, which is plotted using error bars¶

In [56]:
sns.barplot(data = df,x = "cp" ,y = "age")
Out[56]:
<Axes: xlabel='cp', ylabel='age'>
In [57]:
sns.barplot(data = df,x = "fbs",y = "age")
Out[57]:
<Axes: xlabel='fbs', ylabel='age'>
In [58]:
sns.barplot(data = df,x = "dataset",y = "chol")
Out[58]:
<Axes: xlabel='dataset', ylabel='chol'>
In [59]:
sns.barplot(data = df,x = "dataset",y = "age",hue = "sex",color = "purple")
Out[59]:
<Axes: xlabel='dataset', ylabel='age'>
In [60]:
sns.catplot(data = df,x = "sex",y = "age",hue = "fbs",kind = "box",color = "purple")
Out[60]:
<seaborn.axisgrid.FacetGrid at 0x17a66d7c550>
In [61]:
sns.regplot(data = df,x = "age",y = "chol",color = "red")
Out[61]:
<Axes: xlabel='age', ylabel='chol'>
In [62]:
sns.lmplot(data = df,x= "age",y = "chol",hue = "sex")
Out[62]:
<seaborn.axisgrid.FacetGrid at 0x17a67019390>
In [63]:
sns.lmplot(data = df,x = "ca",y = "num",hue = "fbs")
Out[63]:
<seaborn.axisgrid.FacetGrid at 0x17a66f5b790>
In [64]:
sns.residplot(data = df,x = "chol",y = "thalch")
Out[64]:
<Axes: xlabel='chol', ylabel='thalch'>
In [65]:
sns.catplot(data = df,x = "sex",y = "age",kind = "violin")
Out[65]:
<seaborn.axisgrid.FacetGrid at 0x17a6712f890>
In [66]:
sns.catplot(data = df,
           x = "sex",
           y = "chol",
           kind = "violin")
Out[66]:
<seaborn.axisgrid.FacetGrid at 0x17a671a8f90>
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: